* CountiesDB_proc.do
* 2014.11.01
* Last update 2014.12.02: added industry and occupation variables
* Subsets Census counties DB to useful variables and prepares for merge with QCEW data

capture log close
set more off
timer clear 1
timer on 1
clear
clear matrix
clear mata
set matsize 10000
set maxvar 32767

local work "/DIRECTORY"

log using "`work'/logs/CountiesDB_proc.log", replace
eststo clear

* Reading data
use "`work'/data/counties db/CountiesDB.dta", clear

* Keeping useful variables
rename AGE010200D pop_2000
rename AGE010210D pop_2010
rename AGE050200D median_age_2000
rename AGE050210D median_age_2010
rename AGE110200D pop_u5_2000
rename AGE110210D pop_u5_2010
rename AGE140200D pop_5to9_2000
rename AGE140210D pop_5to9_2010
rename AGE210200D pop_10to14_2000
rename AGE210210D pop_10to14_2010
rename AGE240200D pop_15to19_2000
rename AGE240210D pop_15to19_2010
rename AGE320200D pop_20to24_2000
rename AGE320210D pop_20to24_2010
rename AGE350200D pop_25to29_2000
rename AGE350210D pop_25to29_2010
rename AGE410200D pop_30to34_2000
rename AGE410210D pop_30to34_2010
rename AGE440200D pop_35to39_2000
rename AGE440210D pop_35to39_2010
rename AGE510200D pop_40to44_2000
rename AGE510210D pop_40to44_2010
rename AGE540200D pop_45to49_2000
rename AGE540210D pop_45to49_2010
rename AGE610200D pop_50to54_2000
rename AGE610210D pop_50to54_2010
rename AGE640200D pop_55to59_2000
rename AGE640210D pop_55to59_2010
rename AGE670200D pop_60to64_2000
rename AGE670210D pop_60to64_2010
rename AGE710200D pop_65to69_2000
rename AGE710210D pop_65to69_2010
rename AGE780200D pop_70to74_2000
rename AGE780210D pop_70to74_2010
rename AGE810200D pop_75to79_2000
rename AGE810210D pop_75to79_2010
rename AGE850200D pop_80to84_2000
rename AGE850210D pop_80to84_2010
rename AGE880200D pop_85plus_2000
rename AGE880210D pop_85plus_2010
gen mean_age_2000 = ((2*pop_u5_2000)+(7*pop_5to9_2000)+(12*pop_10to14_2000)+(17*pop_15to19_2000)+(22*pop_20to24_2000)+(27*pop_25to29_2000)+(32*pop_30to34_2000)+(37*pop_35to39_2000)+(42*pop_40to44_2000)+(47*pop_45to49_2000) ///
	+(52*pop_50to54_2000)+(57*pop_55to59_2000)+(62*pop_60to64_2000)+(67*pop_65to69_2000)+(72*pop_70to74_2000)+(77*pop_75to79_2000)+(82*pop_80to84_2000)+(87*pop_85plus_2000))/pop_2000
gen mean_age_2010 = ((2*pop_u5_2010)+(7*pop_5to9_2010)+(12*pop_10to14_2010)+(17*pop_15to19_2010)+(22*pop_20to24_2010)+(27*pop_25to29_2010)+(32*pop_30to34_2010)+(37*pop_35to39_2010)+(42*pop_40to44_2010)+(47*pop_45to49_2010) ///
	+(52*pop_50to54_2010)+(57*pop_55to59_2010)+(62*pop_60to64_2010)+(67*pop_65to69_2010)+(72*pop_70to74_2010)+(77*pop_75to79_2010)+(82*pop_80to84_2010)+(87*pop_85plus_2010))/pop_2010
gen pct_black_2000 = POP255200D/100
gen pct_black_2010 = POP255210D/100
gen pct_asian_2000 = POP325200D/100
gen pct_asian_2010 = POP325210D/100
gen pct_white_2000 = POP225200D/100
gen pct_white_2010 = POP225210D/100
gen pct_other_2000 = 1 - pct_black_2000 - pct_asian_2000 - pct_white_2000
gen pct_other_2010 = 1 - pct_black_2010 - pct_asian_2010 - pct_white_2010
rename SEX100200D pop_male_2000
rename SEX100210D pop_male_2010
rename SEX200200D pop_female_2000
rename SEX200210D pop_female_2010
gen pct_female_2000 = pop_female_2000/(pop_female_2000+pop_male_2000)
gen pct_female_2010 = pop_female_2010/(pop_female_2010+pop_male_2010)
rename EDU600200D pop_25plus_foredu
rename EDU610200D lessthan9thgr
rename EDU620200D gr9to12
rename EDU640200D HSgrad
rename EDU650200D some_college
rename EDU690200D BA_BS
rename EDU695200D grad_prof_degree
gen resid = pop_25plus_foredu - lessthan9thgr - gr9to12 - HSgrad - some_college - BA_BS - grad_prof_degree
summarize resid, detail
foreach x in lessthan9thgr gr9to12 HSgrad some_college BA_BS grad_prof_degree {
	gen pct_`x' = `x'/pop_25plus_foredu
}
rename EAN010201D earnings
rename PIN010200D income
rename LND110200D area /* units - mi^2 */
gen pop_density = pop_2000/area
gen fips2 = fips
rename LFE320209D denom_naics
rename LFE330209D naics_1
rename LFE340209D naics_2
rename LFE350209D naics_3
rename LFE360209D naics_4
rename LFE370209D naics_5
rename LFE380209D naics_6
rename LFE390209D naics_7
rename LFE400209D naics_8
rename LFE410209D naics_9
rename LFE420209D naics_10
rename LFE430209D naics_11
rename LFE440209D naics_12
rename LFE450209D naics_13
forval i = 1/13 {
	replace naics_`i' = naics_`i'/denom_naics
}
rename LFE610209D denom_occ
rename LFE620209D occ_1
rename LFE630209D occ_2
rename LFE640209D occ_3
rename LFE650209D occ_4
rename LFE660209D occ_5
rename LFE670209D occ_6
forval i = 1/6 {
	replace occ_`i' = occ_`i'/denom_occ
}

* Subsetting
keep fips fips2 pop_2000 pop_2010 median_* mean_* pct_* earnings income area pop_density naics_* occ_*

* Saving
save "`work'/data/counties db/CountiesDB_subset.dta", replace






timer off 1
timer list 1
capture log close


